package com.dhgate.amazonCraw.spider;

import java.util.ArrayList;
import java.util.List;

import javax.management.JMException;

import org.apache.log4j.Logger;

import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.scheduler.RedisScheduler;

import com.dhgate.amazonCraw.pageproccess.AmazonPageProcess;
import com.dhgate.amazonCraw.pipline.AmazonPipeline;
import com.dhgate.amazonCraw.redis.RedisConnManager;

/**
 * 
 * @author sunxu
 * 
 */
public class AmazonSpider {
	private static final Logger logger = Logger.getLogger(AmazonSpider.class);

	private static void craw() {
		AmazonPageProcess amazonPageProcess = new AmazonPageProcess();
		Spider spider = Spider.create(amazonPageProcess)
				.addPipeline(new AmazonPipeline())
				 .setScheduler(
				 new RedisScheduler(RedisConnManager.getJedisPool()))
				.thread(5);
		/*
		 * Connection connection = Jsoup.connect(
		 * "http://www.amazon.com/gp/site-directory/ref=sa_menu_top_fullstore");
		 * connection.timeout(50000); try { Document document =
		 * connection.get(); Elements elements =
		 * document.select("select[id=searchDropdownBox]") .select("option");
		 * System.out.println(elements.size()); for (Element element : elements)
		 * { String key = element.attr("value"); //
		 * http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Dbeauty if
		 * (!"search-alias=aps".equals(key)) { String url =
		 * "http://www.amazon.com/s/ref=nb_sb_noss?url=" + key;
		 * System.out.println(url); spider.addUrl(url); } } } catch (IOException
		 * e) { logger.error(e.getLocalizedMessage(), e); }
		 */
		List<String> list = new ArrayList<String>();
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=instant-video  ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=appliances     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=mobile-apps    ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=arts-crafts    ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=automotive     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=baby-products  ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=beauty         ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=stripbooks     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=mobile         ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=apparel        ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=collectibles   ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=computers      ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=electronics    ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=financial      ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=gift-cards     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=grocery        ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=hpc            ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=garden         ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=industrial     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=jewelry        ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=digital-text   ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=magazines      ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=movies-tv      ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=digital-music  ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=popular        ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=mi             ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=office-products");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=lawngarden     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=pets           ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=pantry         ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=shoes          ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=software       ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=sporting       ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=tools          ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=toys-and-games ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=videogames     ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=watches        ");
		list.add("http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias=wine           ");
		// list.add("http://www.amazon.com/s/ref=lp_2619525011_nr_n_24/187-1940159-2414024?rh=n%3A2619525011%2Cn%3A%212619526011%2Cn%3A3741451&bbn=2619526011");
		// list.add("http://www.amazon.com/b/ref=sr_aj?node=2407747011&ajr=0");
		// list.add("http://www.amazon.com/s/ref=sr_nr_p_89_48?rh=n%3A1036592%2Cn%3A%211036682%2Cn%3A1040664%2Cn%3A1288617011%2Cn%3A6323793011&bbn=6323793011&ie=UTF8&qid=1399959797");
		// list.add("http://www.amazon.com/s/ref=sr_nr_n_0?rh=n%3A1036592%2Cn%3A!2334084011%2Cn%3A!2334146011%2Cn%3A8728847011%2Cn%3A2402554011%2Cn%3A1044512%2Cn%3A1046236&bbn=8728847011&ie=UTF8&qid=1400030274&rnid=1044512");
		// list.add("http://www.amazon.com/s/ref=sr_nr_p_76_0?rh=n%3A3760911%2Cn%3A%2111055981%2Cn%3A11057241%2Cn%3A11057561%2Cn%3A11057571%2Cp_76%3A1249134011&bbn=11057571&ie=UTF8&qid=1400034504&rnid=1249132011");
		// list.add("http://www.amazon.com/s/ref=sr_nr_p_76_0?rh=n%3A3760911%2Cn%3A%2111055981%2Cn%3A11057241%2Cn%3A11057561%2Cn%3A11057571%2Cp_76%3A1249134011&bbn=11057571&ie=UTF8&qid=1400034504&rnid=1249132011");
		 for (String url : list) {
		 spider.addUrl(url.trim());
		 }
	//spider.addUrl("http://www.amazon.com/s/ref=amb_link_399035222_17?ie=UTF8&bbn=2348631011&rh=i%3Acomputers%2Cn%3A172282%2Cn%3A!493964%2Cn%3A541966%2Cn%3A2348628011%2Cn%3A2348631011%2Cp_n_feature_keywords_five_browse-bin%3A7801108011%2Cn%3A!493964");
		 try {
			SpiderMonitor.instance().register(spider);
		} catch (JMException e) {
			e.printStackTrace();
		}
		spider.run();
	}
	
	public static void main(String args[]) {
		craw();
		logger.info("amazon craw is end...");
	}
}
